package net.tngou.tpage.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.configuration2.PropertiesConfiguration;
import org.apache.commons.io.FileUtils;

public class Tesseract_OCR {
	private static String Tesseract_HOME ;//安装OPenOffice 的路径
	private final static String LANG_OPTION = "-l";  
    private final  static String EOL = System.getProperty("line.separator");
	static{
		PropertiesConfiguration config = ConfigUtil.GetConfig("sys.properties");
		Tesseract_HOME=config.getString("tesseract_home");
	}
	
	
	
	public static String  GetStringImg(File imageFile) {
	
		try {
					
	        File outputFile = new File(imageFile.getParentFile(), "output");   
	        List<String> cmd = new ArrayList<String>();        
	        cmd.add(Tesseract_HOME + File.separator+"tesseract");  	        
	        cmd.add("");  
	        cmd.add(outputFile.getName());  
	        cmd.add(LANG_OPTION);  
	        cmd.add("chi_sim");  
	        cmd.add("eng");  
	  
	        ProcessBuilder pb = new ProcessBuilder();  	        
	        pb.directory(imageFile.getParentFile());  
	        cmd.set(1, imageFile.getName()); 	     
	        pb.command(cmd);  
	        pb.redirectErrorStream(true);  
	        Process process = pb.start();          
	        System.out.println(cmd.toString());  
	        int w = process.waitFor();  
	        if (w == 0)// 0代表正常退出  
	        {  
	        	File tempfile = new File(outputFile.getAbsolutePath() + ".txt");  
	        	String comment=FileUtils.readFileToString(tempfile);
	        	
	        	tempfile.delete();
	            return comment;
	        } else  
	        {  
	            String msg;  
	            switch (w)  
	            {  
	            case 1:  
	                msg = "Errors accessing files. There may be spaces in your image's filename.";  
	                break;  
	            case 29:  
	                msg = "Cannot recognize the image or its selected region.";  
	                break;  
	            case 31:  
	                msg = "Unsupported image format.";  
	                break;  
	            default:  
	                msg = "Errors occurred.";  
	            }  
	            throw new RuntimeException(msg);  
	        }  
		} catch (Exception e) {
		
		}
		return "";
	}
	
	
	
	
	
	
	public static void main(String[] args) throws IOException, InterruptedException {
		 /** 
         * 设置输出文件的保存的文件目录 
         */  
		File imageFile=new File("D:\\MyWork\\Tesseract-OCR\\doc.png");
       System.err.println(GetStringImg(imageFile));
	}
	
}
